Introduction

The aim is to characterize the human fetal kidney from the kidney cell atlas. You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1] The rds data can be download using the download link https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds The azimuth compatible reference has been downloaded and created in the R script download-and-create-fetal-kidney-ref.R

Packages

Load required packages in the following chunk, if needed. Do not install packages here; only load them with the library() function.

library("Seurat")
library(Azimuth)
library(SCpubr)
library(tidyverse)
library(patchwork)

set.seed(params$seed)
options(future.globals.maxSize= 891289600000)

Base directories

# The base path for the OpenScPCA repository, found by its (hidden) .git directory
repository_base <- rprojroot::find_root(rprojroot::is_git_root)

# The path to this module
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")

Input files

The input file is the output of the R script download-and-create-fetal-kidney-ref.R

path_to_data <- file.path(
  module_base,
  "scratch",
  "fetal_kidney.rds"
)

Output file

We will save the result of the differential expression analysis in results/references/00b_marker_genes_fetal_kidney_Stewart.csv Notebook is saved in the notebook/00-reference directory

path_to_output <- file.path(module_base, "results", "references")

Analysis

Load the reference

fetal_kidney <- readRDS(path_to_data)

d1 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "compartment", label = TRUE, repel = TRUE) + NoLegend() 
d2 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "cell_type", label = TRUE, repel = TRUE) + NoLegend()

d1 | d2

Characterization of compartment and cell types in the reference

Here, we use an unbiased approach to find transcripts that characterized the different compartments and cell types.

This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations.

We run DElegate::FindAllMarkers2 to find markers of the different clusters and manually check if they do make sense. DElegate::FindAllMarkers2 is an improved version of Seurat::FindAllMarkers based on pseudobulk differential expression method. Please check the preprint from Chistoph Hafemeister: https://www.biorxiv.org/content/10.1101/2023.03.28.534443v1 and tool described here: https://github.com/cancerbits/DElegate

Find marker genes for each of the compartment

de_results   <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "compartment",)
## Warning in size + sum(size_args, na.rm = FALSE): NAs produced by integer
## overflow
#filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]

DT::datatable(s.markers, caption = ("marker genes"), 
              extensions = 'Buttons', 
              options = list(  dom = 'Bfrtip',
                               buttons = c( 'csv', 'excel')))
# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
    group_by(group1) %>%
    top_n(n =  5, wt = log_fc) -> top5

# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$compartment
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)

p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "compartment", label = TRUE, repel = TRUE) + ggtitle("compartment")
p2 <- DoHeatmap(ss, features = top5$feature,  cells = cells, group.by = "compartment") + NoLegend() + 
  scale_fill_gradientn(colors =  c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03")) 
p3 <- ggplot(fetal_kidney@meta.data, aes(compartment, fill = compartment)) + geom_bar() + NoLegend()


common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 2)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))

write_csv(de_results, file = file.path(path_to_output, "00a_marker_compartment_fetal_kidney_Stewart.csv"))

Find marker genes for each of the cell types

de_results   <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "cell_type")
## Warning in size + sum(size_args, na.rm = FALSE): NAs produced by integer
## overflow
#filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]


DT::datatable(s.markers, caption = ("marker genes"), 
              extensions = 'Buttons', 
              options = list(  dom = 'Bfrtip',
                               buttons = c( 'csv', 'excel')))
# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
    group_by(group1) %>%
    top_n(n =  5, wt = log_fc) -> top5

# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$cell_type
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)

p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "cell_type", label = TRUE, repel = TRUE) + ggtitle("cell_type") + NoLegend()
p2 <- DoHeatmap(ss, features = top5$feature,  cells = cells, group.by = "cell_type") + NoLegend() + 
  scale_fill_gradientn(colors =  c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03")) 
p3 <- ggplot(fetal_kidney@meta.data, aes(cell_type, fill = cell_type)) + geom_bar() + NoLegend() + scale_x_discrete(guide = guide_axis(angle = 90))


common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 1)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))

write_csv( de_results, file = file.path(path_to_output, "00a_marker_cell-type_fetal_kidney_Stewart.csv"))

Session info

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-unknown-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/aarch64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/aarch64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.2.0    lubridate_1.9.3    forcats_1.0.0      stringr_1.5.1     
##  [5] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1       
##  [9] tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0    SCpubr_2.0.2      
## [13] Azimuth_0.5.0      shinyBS_0.61.1     Seurat_5.1.0       SeuratObject_5.0.2
## [17] sp_2.1-4           optparse_1.7.5    
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.4                          ProtGenerics_1.36.0              
##   [3] matrixStats_1.3.0                 spatstat.sparse_3.1-0            
##   [5] bitops_1.0-8                      DirichletMultinomial_1.46.0      
##   [7] TFBSTools_1.42.0                  httr_1.4.7                       
##   [9] RColorBrewer_1.1-3                tools_4.4.1                      
##  [11] sctransform_0.4.1                 utf8_1.2.4                       
##  [13] R6_2.5.1                          DT_0.33                          
##  [15] lazyeval_0.2.2                    uwot_0.2.2                       
##  [17] rhdf5filters_1.16.0               withr_3.0.1                      
##  [19] gridExtra_2.3                     progressr_0.14.0                 
##  [21] cli_3.6.3                         Biobase_2.64.0                   
##  [23] spatstat.explore_3.3-2            fastDummies_1.7.4                
##  [25] EnsDb.Hsapiens.v86_2.99.0         shinyjs_2.1.0                    
##  [27] labeling_0.4.3                    sass_0.4.9                       
##  [29] spatstat.data_3.1-2               ggridges_0.5.6                   
##  [31] pbapply_1.7-2                     yulab.utils_0.1.7                
##  [33] Rsamtools_2.20.0                  R.utils_2.12.3                   
##  [35] parallelly_1.38.0                 limma_3.60.4                     
##  [37] BSgenome_1.72.0                   rstudioapi_0.16.0                
##  [39] RSQLite_2.3.7                     gridGraphics_0.5-1               
##  [41] generics_0.1.3                    BiocIO_1.14.0                    
##  [43] vroom_1.6.5                       crosstalk_1.2.1                  
##  [45] gtools_3.9.5                      ica_1.0-3                        
##  [47] spatstat.random_3.3-1             googlesheets4_1.1.1              
##  [49] GO.db_3.19.1                      Matrix_1.7-0                     
##  [51] fansi_1.0.6                       S4Vectors_0.42.1                 
##  [53] abind_1.4-5                       R.methodsS3_1.8.2                
##  [55] lifecycle_1.0.4                   edgeR_4.2.1                      
##  [57] yaml_2.3.10                       SummarizedExperiment_1.34.0      
##  [59] rhdf5_2.48.0                      SparseArray_1.4.8                
##  [61] Rtsne_0.17                        grid_4.4.1                       
##  [63] blob_1.2.4                        promises_1.3.0                   
##  [65] shinydashboard_0.7.2              crayon_1.5.3                     
##  [67] pwalign_1.0.0                     miniUI_0.1.1.1                   
##  [69] lattice_0.22-6                    cowplot_1.1.3                    
##  [71] GenomicFeatures_1.56.0            annotate_1.82.0                  
##  [73] KEGGREST_1.44.1                   pillar_1.9.0                     
##  [75] knitr_1.48                        GenomicRanges_1.56.1             
##  [77] rjson_0.2.22                      future.apply_1.11.2              
##  [79] codetools_0.2-20                  fastmatch_1.1-4                  
##  [81] leiden_0.4.3.1                    glue_1.7.0                       
##  [83] spatstat.univar_3.0-0             data.table_1.16.0                
##  [85] vctrs_0.6.5                       png_0.1-8                        
##  [87] spam_2.10-0                       cellranger_1.1.0                 
##  [89] gtable_0.3.5                      poweRlaw_0.80.0                  
##  [91] assertthat_0.2.1                  cachem_1.1.0                     
##  [93] xfun_0.47                         Signac_1.14.0                    
##  [95] S4Arrays_1.4.1                    mime_0.12                        
##  [97] pracma_2.4.4                      survival_3.7-0                   
##  [99] DElegate_1.2.1                    gargle_1.5.2                     
## [101] RcppRoll_0.3.1                    statmod_1.5.0                    
## [103] fitdistrplus_1.2-1                ROCR_1.0-11                      
## [105] nlme_3.1-166                      bit64_4.0.5                      
## [107] RcppAnnoy_0.0.22                  GenomeInfoDb_1.40.1              
## [109] rprojroot_2.0.4                   bslib_0.8.0                      
## [111] irlba_2.3.5.1                     KernSmooth_2.23-24               
## [113] SeuratDisk_0.0.0.9021             colorspace_2.1-1                 
## [115] seqLogo_1.70.0                    BiocGenerics_0.50.0              
## [117] DBI_1.2.3                         tidyselect_1.2.1                 
## [119] bit_4.0.5                         compiler_4.4.1                   
## [121] curl_5.2.2                        hdf5r_1.3.11                     
## [123] DelayedArray_0.30.1               plotly_4.10.4                    
## [125] rtracklayer_1.64.0                scales_1.3.0                     
## [127] caTools_1.18.2                    lmtest_0.9-40                    
## [129] rappdirs_0.3.3                    digest_0.6.37                    
## [131] goftest_1.2-3                     presto_1.0.0                     
## [133] spatstat.utils_3.1-0              rmarkdown_2.28                   
## [135] XVector_0.44.0                    htmltools_0.5.8.1                
## [137] pkgconfig_2.0.3                   sparseMatrixStats_1.16.0         
## [139] MatrixGenerics_1.16.0             highr_0.11                       
## [141] fastmap_1.2.0                     ensembldb_2.28.1                 
## [143] rlang_1.1.4                       htmlwidgets_1.6.4                
## [145] UCSC.utils_1.0.0                  shiny_1.9.1                      
## [147] farver_2.1.2                      jquerylib_0.1.4                  
## [149] zoo_1.8-12                        jsonlite_1.8.8                   
## [151] BiocParallel_1.38.0               R.oo_1.26.0                      
## [153] RCurl_1.98-1.16                   magrittr_2.0.3                   
## [155] ggplotify_0.1.2                   GenomeInfoDbData_1.2.12          
## [157] dotCall64_1.1-1                   Rhdf5lib_1.26.0                  
## [159] munsell_0.5.1                     Rcpp_1.0.13                      
## [161] viridis_0.6.5                     reticulate_1.38.0                
## [163] stringi_1.8.4                     zlibbioc_1.50.0                  
## [165] MASS_7.3-61                       plyr_1.8.9                       
## [167] parallel_4.4.1                    listenv_0.9.1                    
## [169] ggrepel_0.9.5                     deldir_2.0-4                     
## [171] CNEr_1.40.0                       Biostrings_2.72.1                
## [173] splines_4.4.1                     tensor_1.5                       
## [175] hms_1.1.3                         locfit_1.5-9.10                  
## [177] BSgenome.Hsapiens.UCSC.hg38_1.4.5 igraph_2.0.3                     
## [179] spatstat.geom_3.3-2               RcppHNSW_0.6.0                   
## [181] reshape2_1.4.4                    stats4_4.4.1                     
## [183] TFMPvalue_0.0.9                   XML_3.99-0.17                    
## [185] evaluate_0.24.0                   JASPAR2020_0.99.10               
## [187] tzdb_0.4.0                        httpuv_1.6.15                    
## [189] RANN_2.6.2                        getopt_1.20.4                    
## [191] polyclip_1.10-7                   future_1.34.0                    
## [193] SeuratData_0.2.2.9001             scattermore_1.2                  
## [195] xtable_1.8-4                      restfulr_0.0.15                  
## [197] AnnotationFilter_1.28.0           RSpectra_0.16-2                  
## [199] later_1.3.2                       googledrive_2.1.1                
## [201] viridisLite_0.4.2                 memoise_2.0.1                    
## [203] AnnotationDbi_1.66.0              GenomicAlignments_1.40.0         
## [205] IRanges_2.38.1                    cluster_2.1.6                    
## [207] timechange_0.3.0                  globals_0.16.3